Irene Jiang and Yizhou Yu
Original data: https://www.cell.com/cell-reports/pdf/S2211-1247(22)00769-0.pdf
Summary of the workflow:
import numpy as np
import pandas as pd
import scanpy as sc
import anndata
from anndata import AnnData
import bbknn
import gseapy
import matplotlib.pyplot as plt
from scipy.stats import mannwhitneyu
from scipy.stats import t
from statsmodels.stats import multitest
sc.settings.verbosity = 3 # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()
sc.settings.set_figure_params(dpi=80, facecolor='white')
scanpy==1.9.3 anndata==0.9.1 umap==0.5.3 numpy==1.23.5 scipy==1.10.0 pandas==1.5.3 scikit-learn==1.2.1 statsmodels==0.13.5 python-igraph==0.10.6 pynndescent==0.5.10
results_file = 'single_cell_test_1.h5ad' # the file that will store the analysis results
adata1 = sc.read_10x_mtx(
r"C:\Users\irene\Tuberculosis_Research\RNA Sequencing Analysis\uninfected_rep1", # the directory with the `.mtx` file
var_names='gene_symbols', # use gene symbols for the variable names (variables-axis index)
cache=True) # write a cache file for faster subsequent reading
... reading from cache file cache\C-Users-irene-Tuberculosis_Research-RNA Sequencing Analysis-uninfected_rep1-matrix.h5ad
adata2 = sc.read_10x_mtx(
r"C:\Users\irene\Tuberculosis_Research\RNA Sequencing Analysis\uninfected_rep2", # the directory with the `.mtx` file
var_names='gene_symbols', # use gene symbols for the variable names (variables-axis index)
cache=True) # write a cache file for faster subsequent reading
... reading from cache file cache\C-Users-irene-Tuberculosis_Research-RNA Sequencing Analysis-uninfected_rep2-matrix.h5ad
adata3 = sc.read_10x_mtx(
r"C:\Users\irene\Tuberculosis_Research\RNA Sequencing Analysis\infected_d50_rep1", # the directory with the `.mtx` file
var_names='gene_symbols', # use gene symbols for the variable names (variables-axis index)
cache=True) # write a cache file for faster subsequent reading
... reading from cache file cache\C-Users-irene-Tuberculosis_Research-RNA Sequencing Analysis-infected_d50_rep1-matrix.h5ad
adata4 = sc.read_10x_mtx(
r"C:\Users\irene\Tuberculosis_Research\RNA Sequencing Analysis\infected_d50_rep2", # the directory with the `.mtx` file
var_names='gene_symbols', # use gene symbols for the variable names (variables-axis index)
cache=True) # write a cache file for faster subsequent reading
... reading from cache file cache\C-Users-irene-Tuberculosis_Research-RNA Sequencing Analysis-infected_d50_rep2-matrix.h5ad
adata5 = sc.read_10x_mtx(
r"C:\Users\irene\Tuberculosis_Research\RNA Sequencing Analysis\infected_d50_rep3", # the directory with the `.mtx` file
var_names='gene_symbols', # use gene symbols for the variable names (variables-axis index)
cache=True) # write a cache file for faster subsequent reading
... reading from cache file cache\C-Users-irene-Tuberculosis_Research-RNA Sequencing Analysis-infected_d50_rep3-matrix.h5ad
adata3.obs['condition'] = "infected"
adata3.obs['replicate'] = "1"
adata4.obs['condition'] = "infected"
adata4.obs['replicate'] = "2"
adata5.obs['condition'] = "infected"
adata5.obs['replicate'] = "3"
adata1.obs['condition'] = "uninfected"
adata1.obs['replicate'] = "1"
adata2.obs['condition'] = "uninfected"
adata2.obs['replicate'] = "2"
adatas = [adata1, adata2,adata3, adata4, adata5]
adata = anndata.concat(adatas)
C:\Users\irene\anaconda3\lib\site-packages\anndata\_core\anndata.py:1830: UserWarning: Observation names are not unique. To make them unique, call `.obs_names_make_unique`.
utils.warn_names_duplicates("obs")
adata.var = adata1.var
adata.var_names_make_unique() # this is unnecessary if using `var_names='gene_ids'` in `sc.read_10x_mtx`
adata
AnnData object with n_obs × n_vars = 25227 × 27998
obs: 'condition', 'replicate'
var: 'gene_ids', 'feature_types'
sc.pl.highest_expr_genes(adata, n_top=20, )
normalizing counts per cell
finished (0:00:00)
sc.pp.filter_cells(adata, min_genes=200)
sc.pp.filter_genes(adata, min_cells=3)
filtered out 1267 cells that have less than 200 genes expressed
C:\Users\irene\anaconda3\lib\site-packages\anndata\_core\anndata.py:1830: UserWarning: Observation names are not unique. To make them unique, call `.obs_names_make_unique`.
utils.warn_names_duplicates("obs")
filtered out 11214 genes that are detected in less than 3 cells
C:\Users\irene\anaconda3\lib\site-packages\anndata\_core\anndata.py:1830: UserWarning: Observation names are not unique. To make them unique, call `.obs_names_make_unique`.
utils.warn_names_duplicates("obs")
adata.var['mt'] = adata.var_names.str.startswith('mt-') # annotate the group of mitochondrial genes as 'mt'
sc.pp.calculate_qc_metrics(adata, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)
sc.pl.violin(adata, ['n_genes_by_counts', 'total_counts', 'pct_counts_mt'],
jitter=0.4, multi_panel=True)
sc.pl.scatter(adata, x='total_counts', y='pct_counts_mt')
sc.pl.scatter(adata, x='total_counts', y='n_genes_by_counts')
adata = adata[adata.obs.n_genes_by_counts < 4000, :]
adata = adata[adata.obs.pct_counts_mt < 20, :]
sc.pp.normalize_total(adata, target_sum=1e4)
normalizing counts per cell
C:\Users\irene\anaconda3\lib\site-packages\scanpy\preprocessing\_normalization.py:170: UserWarning: Received a view of an AnnData. Making a copy.
view_to_actual(adata)
C:\Users\irene\anaconda3\lib\site-packages\anndata\_core\anndata.py:1830: UserWarning: Observation names are not unique. To make them unique, call `.obs_names_make_unique`.
utils.warn_names_duplicates("obs")
finished (0:00:00)
sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5)
extracting highly variable genes
finished (0:00:01)
--> added
'highly_variable', boolean vector (adata.var)
'means', float vector (adata.var)
'dispersions', float vector (adata.var)
'dispersions_norm', float vector (adata.var)
sc.pl.highly_variable_genes(adata)
adata.raw = adata
adata = adata[:, adata.var.highly_variable]
sc.pp.regress_out(adata, ['total_counts', 'pct_counts_mt'])
regressing out ['total_counts', 'pct_counts_mt']
sparse input is densified and may lead to high memory use
C:\Users\irene\anaconda3\lib\site-packages\anndata\_core\anndata.py:1830: UserWarning: Observation names are not unique. To make them unique, call `.obs_names_make_unique`.
utils.warn_names_duplicates("obs")
finished (0:00:40)
sc.pp.scale(adata, max_value=10)
sc.tl.pca(adata, svd_solver='arpack')
computing PCA
on highly variable genes
with n_comps=50
finished (0:00:02)
sc.pl.pca(adata, color = 'condition')
#coloring by expression level
C:\Users\irene\anaconda3\lib\site-packages\scanpy\plotting\_tools\scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored cax = scatter(
sc.pl.pca_variance_ratio(adata, log=True)
adata.write(results_file)
sc.pp.neighbors(adata, n_neighbors=10, n_pcs=40)
computing neighbors
using 'X_pca' with n_pcs = 40
finished: added to `.uns['neighbors']`
`.obsp['distances']`, distances for each pair of neighbors
`.obsp['connectivities']`, weighted adjacency matrix (0:00:27)
sc.tl.umap(adata)
computing UMAP
finished: added
'X_umap', UMAP coordinates (adata.obsm) (0:00:20)
sc.pl.umap(adata, color=['Malat1', 'Hbb-bs', 'Gm42418','condition'])
C:\Users\irene\anaconda3\lib\site-packages\scanpy\plotting\_tools\scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored cax = scatter(
sc.tl.leiden(adata, resolution = 0.1, key_added = "leiden_0.1")
sc.pl.umap(adata, color=['leiden_0.1'])
running Leiden clustering
finished: found 10 clusters and added
'leiden_0.1', the cluster labels (adata.obs, categorical) (0:00:01)
C:\Users\irene\anaconda3\lib\site-packages\scanpy\plotting\_tools\scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored cax = scatter(
sc.tl.rank_genes_groups(adata, 'leiden_0.1', method='t-test')
sc.pl.rank_genes_groups(adata, n_genes=10, sharey=False)
ranking genes
finished: added to `.uns['rank_genes_groups']`
'names', sorted np.recarray to be indexed by group ids
'scores', sorted np.recarray to be indexed by group ids
'logfoldchanges', sorted np.recarray to be indexed by group ids
'pvals', sorted np.recarray to be indexed by group ids
'pvals_adj', sorted np.recarray to be indexed by group ids (0:00:09)
adata.write(results_file)
b_cells = ['Cd19','Cd79a','Ms4a1']
cd4_t_cells = ['Cd4', 'Cd3e']
cd8_t_cells = ['Cd8a', 'Cd3e']
nk_cells = ['Nkg7','Klrd1','Tyrobp']
active_t_cells = ['Cd3e', 'Ccl5', 'Cx3cr1']
endothelial_cells = ['Egfl7', 'Epas1', 'Ramp2']
sc.pl.umap(adata,color = b_cells, ncols = 3)
sc.pl.umap(adata,color = cd4_t_cells, ncols = 3)
sc.pl.umap(adata,color = cd8_t_cells, ncols = 3)
sc.pl.umap(adata,color = nk_cells, ncols = 3)
sc.pl.umap(adata,color = active_t_cells, ncols = 3)
sc.pl.umap(adata,color = endothelial_cells, ncols = 3)
new_cluster_names = [
'B cells',
'CD4 T cells', 'CD8 T cells',
'NK cells','Active T cells','Endothelial cells','outliers1','outlier2', 'outlier3', 'outlier4']
adata.rename_categories('leiden_0.1', new_cluster_names)
adata.obs
| condition | replicate | n_genes | n_genes_by_counts | total_counts | total_counts_mt | pct_counts_mt | leiden_0.1 | |
|---|---|---|---|---|---|---|---|---|
| AAACCTGAGCAATATG-1 | uninfected | 1 | 1182 | 1182 | 1955.0 | 15.0 | 0.767263 | Endothelial cells |
| AAACCTGAGCATCATC-1 | uninfected | 1 | 1916 | 1916 | 6060.0 | 224.0 | 3.696370 | CD4 T cells |
| AAACCTGCAAGTACCT-1 | uninfected | 1 | 1740 | 1740 | 3536.0 | 60.0 | 1.696833 | NK cells |
| AAACCTGCAGAGTGTG-1 | uninfected | 1 | 1301 | 1301 | 3279.0 | 106.0 | 3.232693 | B cells |
| AAACCTGCAGATCGGA-1 | uninfected | 1 | 953 | 953 | 1815.0 | 61.0 | 3.360882 | B cells |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| TTTGTCATCAGTTCGA-1 | infected | 3 | 1846 | 1846 | 6657.0 | 190.0 | 2.854138 | CD8 T cells |
| TTTGTCATCCAAACAC-1 | infected | 3 | 1845 | 1845 | 4615.0 | 40.0 | 0.866739 | NK cells |
| TTTGTCATCGTAGGTT-1 | infected | 3 | 1378 | 1378 | 2985.0 | 90.0 | 3.015075 | NK cells |
| TTTGTCATCTGCTGTC-1 | infected | 3 | 1569 | 1569 | 3451.0 | 60.0 | 1.738626 | NK cells |
| TTTGTCATCTTGGGTA-1 | infected | 3 | 1484 | 1484 | 3931.0 | 104.0 | 2.645637 | CD4 T cells |
23823 rows × 8 columns
sc.pl.umap(adata, color='leiden_0.1', legend_loc='on data', title='', frameon=False, save='.pdf')
WARNING: saving figure to file figures\umap.pdf
C:\Users\irene\anaconda3\lib\site-packages\scanpy\plotting\_tools\scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored cax = scatter(
sc.pl.umap(adata, color='condition', title='', frameon=False, save='_condition.pdf')
WARNING: saving figure to file figures\umap_condition.pdf
C:\Users\irene\anaconda3\lib\site-packages\scanpy\plotting\_tools\scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored cax = scatter(
marker_genes = b_cells + cd4_t_cells + cd8_t_cells + nk_cells + active_t_cells + endothelial_cells
sc.pl.dotplot(adata, marker_genes, groupby='leiden_0.1');
C:\Users\irene\anaconda3\lib\site-packages\scanpy\plotting\_dotplot.py:749: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored dot_ax.scatter(x, y, **kwds)
sc.tl.rank_genes_groups(adata, 'leiden_0.1', method='wilcoxon', key_added="wilcoxon")
sc.tl.rank_genes_groups(adata, 'leiden_0.1', method='t-test', key_added="t-test")
ranking genes
finished: added to `.uns['wilcoxon']`
'names', sorted np.recarray to be indexed by group ids
'scores', sorted np.recarray to be indexed by group ids
'logfoldchanges', sorted np.recarray to be indexed by group ids
'pvals', sorted np.recarray to be indexed by group ids
'pvals_adj', sorted np.recarray to be indexed by group ids (0:00:28)
ranking genes
finished: added to `.uns['t-test']`
'names', sorted np.recarray to be indexed by group ids
'scores', sorted np.recarray to be indexed by group ids
'logfoldchanges', sorted np.recarray to be indexed by group ids
'pvals', sorted np.recarray to be indexed by group ids
'pvals_adj', sorted np.recarray to be indexed by group ids (0:00:02)
#compare cluster1 genes, only stores top 100 by default
wc = sc.get.rank_genes_groups_df(adata, group='NK cells', key='wilcoxon', pval_cutoff=0.01, log2fc_min=0)
tt = sc.get.rank_genes_groups_df(adata, group='NK cells', key='t-test', pval_cutoff=0.01, log2fc_min=0)
tt.head()
| names | scores | logfoldchanges | pvals | pvals_adj | |
|---|---|---|---|---|---|
| 0 | Fcer1g | 241.670776 | 7.981099 | 0.0 | 0.0 |
| 1 | Gzma | 240.246628 | 9.004680 | 0.0 | 0.0 |
| 2 | Tyrobp | 224.478134 | 7.450509 | 0.0 | 0.0 |
| 3 | Ccl5 | 199.512817 | 7.259755 | 0.0 | 0.0 |
| 4 | Ncr1 | 187.117294 | 9.758260 | 0.0 | 0.0 |
target_genes = ['Usp24', 'Ccdc122', 'Pdp1', 'Glrx5', 'Slc2a8', 'Lamp1','Zfp575','Klk10', 'Pinlyp']
sc.pl.stacked_violin(adata, target_genes, groupby = 'leiden_0.1')
Cell types: new_cluster_names = [ 'B cells', 'CD4 T cells', 'CD8 T cells', 'NK cells','Active T cells','Endothelial cells','outliers1','outlier2', 'outlier3', 'outlier4']
inf_group
array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
dtype=float32)
df = sc.get.obs_df(adata, target_genes+ ['leiden_0.1','condition'], use_raw=True)
def yy_ci (x1,x2,m1,m2):
n1 = x1.size
n2 = x2.size
v1 = np.var(x1)
v2 = np.var(x2)
d = m1/m2
se = np.sqrt(v1/n1 + v2/n2)
df = (v1/n1 + v2/n2)**2/(v1**2/(n1**2 * (n1 -1 )) + v2**2 / (n2**2 * (n2-1)))
lb = d - t.ppf(0.975,df) * se
ub = d + t.ppf(0.975,df) * se
return lb,ub
dge_target = pd.DataFrame()
for celltype in new_cluster_names:
cells = df[df['leiden_0.1'] == celltype]
for gene_interest in target_genes:
inf_group = cells[cells['condition'] == 'infected'][gene_interest].values
control_group = cells[cells['condition'] == 'uninfected'][gene_interest].values
u,p = mannwhitneyu(inf_group,control_group)
inf_mean = np.expm1(np.mean(np.log1p(inf_group)))
control_mean = np.expm1(np.mean(np.log1p(control_group)))
fc = inf_mean / control_mean
lb,ub = yy_ci(inf_group,control_group,inf_mean,control_mean)
dt_out = {'mann U':[u],'fc':[fc],'pval':[p],'cell type':[celltype],"gene":[gene_interest],
'lb':[lb],'ub':[ub]}
dt_out = pd.DataFrame(dt_out)
dge_target = pd.concat([dge_target,dt_out])
dge_target['adj_p'] = multitest.multipletests(dge_target['pval'])[1]
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: invalid value encountered in float_scalars fc = inf_mean / control_mean C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: invalid value encountered in float_scalars d = m1/m2 C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:12: RuntimeWarning: invalid value encountered in double_scalars df = (v1/n1 + v2/n2)**2/(v1**2/(n1**2 * (n1 -1 )) + v2**2 / (n2**2 * (n2-1))) C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: divide by zero encountered in float_scalars fc = inf_mean / control_mean C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: divide by zero encountered in float_scalars d = m1/m2 C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: divide by zero encountered in float_scalars fc = inf_mean / control_mean C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: divide by zero encountered in float_scalars d = m1/m2 C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: invalid value encountered in float_scalars fc = inf_mean / control_mean C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: invalid value encountered in float_scalars d = m1/m2 C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:12: RuntimeWarning: invalid value encountered in double_scalars df = (v1/n1 + v2/n2)**2/(v1**2/(n1**2 * (n1 -1 )) + v2**2 / (n2**2 * (n2-1))) C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: invalid value encountered in float_scalars fc = inf_mean / control_mean C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: invalid value encountered in float_scalars d = m1/m2 C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:12: RuntimeWarning: invalid value encountered in double_scalars df = (v1/n1 + v2/n2)**2/(v1**2/(n1**2 * (n1 -1 )) + v2**2 / (n2**2 * (n2-1))) C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: invalid value encountered in float_scalars fc = inf_mean / control_mean C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: invalid value encountered in float_scalars d = m1/m2 C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:12: RuntimeWarning: invalid value encountered in double_scalars df = (v1/n1 + v2/n2)**2/(v1**2/(n1**2 * (n1 -1 )) + v2**2 / (n2**2 * (n2-1))) C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: divide by zero encountered in float_scalars fc = inf_mean / control_mean C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: divide by zero encountered in float_scalars d = m1/m2 C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: invalid value encountered in float_scalars fc = inf_mean / control_mean C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: invalid value encountered in float_scalars d = m1/m2 C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:12: RuntimeWarning: invalid value encountered in double_scalars df = (v1/n1 + v2/n2)**2/(v1**2/(n1**2 * (n1 -1 )) + v2**2 / (n2**2 * (n2-1))) C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: invalid value encountered in float_scalars fc = inf_mean / control_mean C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: invalid value encountered in float_scalars d = m1/m2 C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:12: RuntimeWarning: invalid value encountered in double_scalars df = (v1/n1 + v2/n2)**2/(v1**2/(n1**2 * (n1 -1 )) + v2**2 / (n2**2 * (n2-1))) C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: divide by zero encountered in float_scalars fc = inf_mean / control_mean C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: divide by zero encountered in float_scalars d = m1/m2 C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: divide by zero encountered in float_scalars fc = inf_mean / control_mean C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: divide by zero encountered in float_scalars d = m1/m2 C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: invalid value encountered in float_scalars fc = inf_mean / control_mean C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: invalid value encountered in float_scalars d = m1/m2 C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:12: RuntimeWarning: invalid value encountered in double_scalars df = (v1/n1 + v2/n2)**2/(v1**2/(n1**2 * (n1 -1 )) + v2**2 / (n2**2 * (n2-1))) C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: invalid value encountered in float_scalars fc = inf_mean / control_mean C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: invalid value encountered in float_scalars d = m1/m2 C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:12: RuntimeWarning: invalid value encountered in double_scalars df = (v1/n1 + v2/n2)**2/(v1**2/(n1**2 * (n1 -1 )) + v2**2 / (n2**2 * (n2-1))) C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: invalid value encountered in float_scalars fc = inf_mean / control_mean C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: invalid value encountered in float_scalars d = m1/m2 C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:12: RuntimeWarning: invalid value encountered in double_scalars df = (v1/n1 + v2/n2)**2/(v1**2/(n1**2 * (n1 -1 )) + v2**2 / (n2**2 * (n2-1))) C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: invalid value encountered in float_scalars fc = inf_mean / control_mean C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: invalid value encountered in float_scalars d = m1/m2 C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:12: RuntimeWarning: invalid value encountered in double_scalars df = (v1/n1 + v2/n2)**2/(v1**2/(n1**2 * (n1 -1 )) + v2**2 / (n2**2 * (n2-1))) C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: divide by zero encountered in float_scalars fc = inf_mean / control_mean C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: divide by zero encountered in float_scalars d = m1/m2 C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: invalid value encountered in float_scalars fc = inf_mean / control_mean C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: invalid value encountered in float_scalars d = m1/m2 C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:12: RuntimeWarning: invalid value encountered in double_scalars df = (v1/n1 + v2/n2)**2/(v1**2/(n1**2 * (n1 -1 )) + v2**2 / (n2**2 * (n2-1))) C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: invalid value encountered in float_scalars fc = inf_mean / control_mean C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: invalid value encountered in float_scalars d = m1/m2 C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:12: RuntimeWarning: invalid value encountered in double_scalars df = (v1/n1 + v2/n2)**2/(v1**2/(n1**2 * (n1 -1 )) + v2**2 / (n2**2 * (n2-1))) C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: invalid value encountered in float_scalars fc = inf_mean / control_mean C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: invalid value encountered in float_scalars d = m1/m2 C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:12: RuntimeWarning: invalid value encountered in double_scalars df = (v1/n1 + v2/n2)**2/(v1**2/(n1**2 * (n1 -1 )) + v2**2 / (n2**2 * (n2-1))) C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: invalid value encountered in float_scalars fc = inf_mean / control_mean C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: invalid value encountered in float_scalars d = m1/m2 C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:12: RuntimeWarning: invalid value encountered in double_scalars df = (v1/n1 + v2/n2)**2/(v1**2/(n1**2 * (n1 -1 )) + v2**2 / (n2**2 * (n2-1))) C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: invalid value encountered in float_scalars fc = inf_mean / control_mean C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: invalid value encountered in float_scalars d = m1/m2 C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:12: RuntimeWarning: invalid value encountered in double_scalars df = (v1/n1 + v2/n2)**2/(v1**2/(n1**2 * (n1 -1 )) + v2**2 / (n2**2 * (n2-1))) C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: invalid value encountered in float_scalars fc = inf_mean / control_mean C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: invalid value encountered in float_scalars d = m1/m2 C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:12: RuntimeWarning: invalid value encountered in double_scalars df = (v1/n1 + v2/n2)**2/(v1**2/(n1**2 * (n1 -1 )) + v2**2 / (n2**2 * (n2-1))) C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: invalid value encountered in float_scalars fc = inf_mean / control_mean C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: invalid value encountered in float_scalars d = m1/m2 C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:12: RuntimeWarning: invalid value encountered in double_scalars df = (v1/n1 + v2/n2)**2/(v1**2/(n1**2 * (n1 -1 )) + v2**2 / (n2**2 * (n2-1))) C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: invalid value encountered in float_scalars fc = inf_mean / control_mean C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: invalid value encountered in float_scalars d = m1/m2 C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:12: RuntimeWarning: invalid value encountered in double_scalars df = (v1/n1 + v2/n2)**2/(v1**2/(n1**2 * (n1 -1 )) + v2**2 / (n2**2 * (n2-1))) C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: invalid value encountered in float_scalars fc = inf_mean / control_mean C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: invalid value encountered in float_scalars d = m1/m2 C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:12: RuntimeWarning: invalid value encountered in double_scalars df = (v1/n1 + v2/n2)**2/(v1**2/(n1**2 * (n1 -1 )) + v2**2 / (n2**2 * (n2-1))) C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: invalid value encountered in float_scalars fc = inf_mean / control_mean C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: invalid value encountered in float_scalars d = m1/m2 C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:12: RuntimeWarning: invalid value encountered in double_scalars df = (v1/n1 + v2/n2)**2/(v1**2/(n1**2 * (n1 -1 )) + v2**2 / (n2**2 * (n2-1))) C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: divide by zero encountered in float_scalars fc = inf_mean / control_mean C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: divide by zero encountered in float_scalars d = m1/m2 C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: invalid value encountered in float_scalars fc = inf_mean / control_mean C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: invalid value encountered in float_scalars d = m1/m2 C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:12: RuntimeWarning: invalid value encountered in double_scalars df = (v1/n1 + v2/n2)**2/(v1**2/(n1**2 * (n1 -1 )) + v2**2 / (n2**2 * (n2-1))) C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: invalid value encountered in float_scalars fc = inf_mean / control_mean C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: invalid value encountered in float_scalars d = m1/m2 C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:12: RuntimeWarning: invalid value encountered in double_scalars df = (v1/n1 + v2/n2)**2/(v1**2/(n1**2 * (n1 -1 )) + v2**2 / (n2**2 * (n2-1))) C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: invalid value encountered in float_scalars fc = inf_mean / control_mean C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: invalid value encountered in float_scalars d = m1/m2 C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:12: RuntimeWarning: invalid value encountered in double_scalars df = (v1/n1 + v2/n2)**2/(v1**2/(n1**2 * (n1 -1 )) + v2**2 / (n2**2 * (n2-1))) C:\Users\irene\anaconda3\lib\site-packages\statsmodels\stats\multitest.py:177: RuntimeWarning: divide by zero encountered in log1p np.log1p(-pvals))
dge_target[dge_target['pval'] < 0.05]
| mann U | fc | pval | cell type | gene | lb | ub | adj_p | |
|---|---|---|---|---|---|---|---|---|
| 0 | 5824602.0 | 1.848775 | 2.704393e-03 | B cells | Ccdc122 | 1.841637 | 1.855914 | 2.034591e-01 |
| 0 | 5547269.5 | 0.281678 | 1.484215e-19 | B cells | Glrx5 | 0.268484 | 0.294872 | 1.320952e-17 |
| 0 | 2853545.5 | 0.285144 | 1.739346e-20 | CD4 T cells | Glrx5 | 0.269105 | 0.301184 | 1.565411e-18 |
| 0 | 3060867.0 | 1.780395 | 3.261525e-03 | CD4 T cells | Slc2a8 | 1.770395 | 1.790395 | 2.374971e-01 |
| 0 | 2499920.0 | 1.416703 | 2.322545e-02 | CD8 T cells | Pdp1 | 1.405883 | 1.427524 | 8.509469e-01 |
| 0 | 2346440.0 | 0.387782 | 1.474287e-12 | CD8 T cells | Glrx5 | 0.372499 | 0.403066 | 1.282630e-10 |
| 0 | 1654812.5 | 0.256565 | 1.764245e-18 | NK cells | Glrx5 | 0.237034 | 0.276096 | 1.552536e-16 |
| 0 | 1718311.5 | 0.787590 | 3.090018e-02 | NK cells | Lamp1 | 0.760010 | 0.815171 | 9.188141e-01 |
| 0 | 129079.0 | 0.216788 | 3.292066e-08 | Active T cells | Glrx5 | 0.165980 | 0.267596 | 2.831173e-06 |
| 0 | 9111.0 | 0.176376 | 7.668835e-04 | outliers1 | Glrx5 | 0.097916 | 0.254835 | 6.312940e-02 |
| 0 | 1555.0 | 0.182470 | 3.356609e-03 | outlier2 | Glrx5 | 0.070642 | 0.294299 | 2.409631e-01 |
dge_target[dge_target['adj_p'] < 0.05]
| mann U | fc | pval | cell type | gene | lb | ub | adj_p | |
|---|---|---|---|---|---|---|---|---|
| 0 | 5547269.5 | 0.281678 | 1.484215e-19 | B cells | Glrx5 | 0.268484 | 0.294872 | 1.320952e-17 |
| 0 | 2853545.5 | 0.285144 | 1.739346e-20 | CD4 T cells | Glrx5 | 0.269105 | 0.301184 | 1.565411e-18 |
| 0 | 2346440.0 | 0.387782 | 1.474287e-12 | CD8 T cells | Glrx5 | 0.372499 | 0.403066 | 1.282630e-10 |
| 0 | 1654812.5 | 0.256565 | 1.764245e-18 | NK cells | Glrx5 | 0.237034 | 0.276096 | 1.552536e-16 |
| 0 | 129079.0 | 0.216788 | 3.292066e-08 | Active T cells | Glrx5 | 0.165980 | 0.267596 | 2.831173e-06 |
fig, ax = plt.subplots(1,1)
data=[[0.28,1.32e-17,"B cells"],
[0.29,1.56e-18,"CD4 T cells"],
[0.39,1.28e-10,"CD8 T cells"],
[0.26,1.55e-16,"NK cells"],
[0.22,2.83e-06,"Active T cells"]]
column_labels=["Fold Change", "Adj Pval", "Cell Type"]
#creating a 2-dimensional dataframe out of the given data
df=pd.DataFrame(data,columns=column_labels)
ax.axis('tight') #turns off the axis lines and labels
ax.axis('off') #changes x and y axis limits such that all data is shown
table = ax.table(cellText=df.values,
colLabels=df.columns,
colColours =["lightcoral"] * 3,
loc="center")
table.set_fontsize(14)
table.scale(1,2)
plt.show()
dge_pval = dge_target[dge_target['pval'] < 0.05]
fig, ax = plt.subplots(nrows=1, sharex=True, sharey=True, figsize=(4, 2), dpi=500)
dge_pval['label'] = dge_pval['gene'].astype(str) + ' in ' + dge_pval['cell type'].astype(str)
dge_pval = dge_pval.sort_values(by=['fc'])
for idx, row in dge_pval.iterrows():
ci = row['ub'] - (row['lb'] + row['ub'])/2
if row['adj_p'] < 0.05:
plt.errorbar(x=[row['fc']], y=[row['label']], xerr=ci,
ecolor='tab:red', capsize=2.5, linestyle='None', linewidth=0.75, marker="o",
markersize=2.5, mfc="tab:red", mec="tab:red")
else:
plt.errorbar(x=[row['fc']], y=[row['label']], xerr=ci,
ecolor='tab:gray', capsize=2.5, linestyle='None', linewidth=0.75, marker="o",
markersize=2, mfc="tab:gray", mec="tab:gray")
ax.tick_params(axis='x', labelsize=5)
ax.tick_params(axis='y', labelsize=4)
#plt.tick_params(axis='both', which='major', labelsize=8)
plt.xlabel('Fold change and 95% Confidence Interval', fontsize=6)
plt.tight_layout()
plt.savefig('forest_plot.png')
plt.show()
genes1 = sc.get.rank_genes_groups_df(cl1_sub, group='infected', key='wilcoxon')['names'][:20]
genes2 = sc.get.rank_genes_groups_df(cl1_sub, group='uninfected', key='wilcoxon')['names'][:20]
genes = genes1.tolist() + genes2.tolist()
sc.pl.dotplot(cl1_sub,genes, groupby='condition')
C:\Users\irene\anaconda3\lib\site-packages\scanpy\plotting\_dotplot.py:749: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored dot_ax.scatter(x, y, **kwds)
#Available databases : ‘Human’, ‘Mouse’, ‘Yeast’, ‘Fly’, ‘Fish’, ‘Worm’
gene_set_names = gseapy.get_library_name(organism='Human')
print(gene_set_names)
['ARCHS4_Cell-lines', 'ARCHS4_IDG_Coexp', 'ARCHS4_Kinases_Coexp', 'ARCHS4_TFs_Coexp', 'ARCHS4_Tissues', 'Achilles_fitness_decrease', 'Achilles_fitness_increase', 'Aging_Perturbations_from_GEO_down', 'Aging_Perturbations_from_GEO_up', 'Allen_Brain_Atlas_10x_scRNA_2021', 'Allen_Brain_Atlas_down', 'Allen_Brain_Atlas_up', 'Azimuth_Cell_Types_2021', 'BioCarta_2013', 'BioCarta_2015', 'BioCarta_2016', 'BioPlanet_2019', 'BioPlex_2017', 'CCLE_Proteomics_2020', 'CORUM', 'COVID-19_Related_Gene_Sets', 'COVID-19_Related_Gene_Sets_2021', 'Cancer_Cell_Line_Encyclopedia', 'CellMarker_Augmented_2021', 'ChEA_2013', 'ChEA_2015', 'ChEA_2016', 'ChEA_2022', 'Chromosome_Location', 'Chromosome_Location_hg19', 'ClinVar_2019', 'DSigDB', 'Data_Acquisition_Method_Most_Popular_Genes', 'DepMap_WG_CRISPR_Screens_Broad_CellLines_2019', 'DepMap_WG_CRISPR_Screens_Sanger_CellLines_2019', 'Descartes_Cell_Types_and_Tissue_2021', 'Diabetes_Perturbations_GEO_2022', 'DisGeNET', 'Disease_Perturbations_from_GEO_down', 'Disease_Perturbations_from_GEO_up', 'Disease_Signatures_from_GEO_down_2014', 'Disease_Signatures_from_GEO_up_2014', 'DrugMatrix', 'Drug_Perturbations_from_GEO_2014', 'Drug_Perturbations_from_GEO_down', 'Drug_Perturbations_from_GEO_up', 'ENCODE_Histone_Modifications_2013', 'ENCODE_Histone_Modifications_2015', 'ENCODE_TF_ChIP-seq_2014', 'ENCODE_TF_ChIP-seq_2015', 'ENCODE_and_ChEA_Consensus_TFs_from_ChIP-X', 'ESCAPE', 'Elsevier_Pathway_Collection', 'Enrichr_Libraries_Most_Popular_Genes', 'Enrichr_Submissions_TF-Gene_Coocurrence', 'Enrichr_Users_Contributed_Lists_2020', 'Epigenomics_Roadmap_HM_ChIP-seq', 'FANTOM6_lncRNA_KD_DEGs', 'GO_Biological_Process_2013', 'GO_Biological_Process_2015', 'GO_Biological_Process_2017', 'GO_Biological_Process_2017b', 'GO_Biological_Process_2018', 'GO_Biological_Process_2021', 'GO_Biological_Process_2023', 'GO_Cellular_Component_2013', 'GO_Cellular_Component_2015', 'GO_Cellular_Component_2017', 'GO_Cellular_Component_2017b', 'GO_Cellular_Component_2018', 'GO_Cellular_Component_2021', 'GO_Cellular_Component_2023', 'GO_Molecular_Function_2013', 'GO_Molecular_Function_2015', 'GO_Molecular_Function_2017', 'GO_Molecular_Function_2017b', 'GO_Molecular_Function_2018', 'GO_Molecular_Function_2021', 'GO_Molecular_Function_2023', 'GTEx_Aging_Signatures_2021', 'GTEx_Tissue_Expression_Down', 'GTEx_Tissue_Expression_Up', 'GTEx_Tissues_V8_2023', 'GWAS_Catalog_2019', 'GWAS_Catalog_2023', 'GeDiPNet_2023', 'GeneSigDB', 'Gene_Perturbations_from_GEO_down', 'Gene_Perturbations_from_GEO_up', 'Genes_Associated_with_NIH_Grants', 'Genome_Browser_PWMs', 'GlyGen_Glycosylated_Proteins_2022', 'HDSigDB_Human_2021', 'HDSigDB_Mouse_2021', 'HMDB_Metabolites', 'HMS_LINCS_KinomeScan', 'HomoloGene', 'HuBMAP_ASCT_plus_B_augmented_w_RNAseq_Coexpression', 'HuBMAP_ASCTplusB_augmented_2022', 'HumanCyc_2015', 'HumanCyc_2016', 'Human_Gene_Atlas', 'Human_Phenotype_Ontology', 'IDG_Drug_Targets_2022', 'InterPro_Domains_2019', 'Jensen_COMPARTMENTS', 'Jensen_DISEASES', 'Jensen_TISSUES', 'KEA_2013', 'KEA_2015', 'KEGG_2013', 'KEGG_2015', 'KEGG_2016', 'KEGG_2019_Human', 'KEGG_2019_Mouse', 'KEGG_2021_Human', 'KOMP2_Mouse_Phenotypes_2022', 'Kinase_Perturbations_from_GEO_down', 'Kinase_Perturbations_from_GEO_up', 'L1000_Kinase_and_GPCR_Perturbations_down', 'L1000_Kinase_and_GPCR_Perturbations_up', 'LINCS_L1000_CRISPR_KO_Consensus_Sigs', 'LINCS_L1000_Chem_Pert_Consensus_Sigs', 'LINCS_L1000_Chem_Pert_down', 'LINCS_L1000_Chem_Pert_up', 'LINCS_L1000_Ligand_Perturbations_down', 'LINCS_L1000_Ligand_Perturbations_up', 'Ligand_Perturbations_from_GEO_down', 'Ligand_Perturbations_from_GEO_up', 'MAGMA_Drugs_and_Diseases', 'MAGNET_2023', 'MCF7_Perturbations_from_GEO_down', 'MCF7_Perturbations_from_GEO_up', 'MGI_Mammalian_Phenotype_2013', 'MGI_Mammalian_Phenotype_2017', 'MGI_Mammalian_Phenotype_Level_3', 'MGI_Mammalian_Phenotype_Level_4', 'MGI_Mammalian_Phenotype_Level_4_2019', 'MGI_Mammalian_Phenotype_Level_4_2021', 'MSigDB_Computational', 'MSigDB_Hallmark_2020', 'MSigDB_Oncogenic_Signatures', 'Metabolomics_Workbench_Metabolites_2022', 'Microbe_Perturbations_from_GEO_down', 'Microbe_Perturbations_from_GEO_up', 'Mouse_Gene_Atlas', 'NCI-60_Cancer_Cell_Lines', 'NCI-Nature_2015', 'NCI-Nature_2016', 'NIH_Funded_PIs_2017_AutoRIF_ARCHS4_Predictions', 'NIH_Funded_PIs_2017_GeneRIF_ARCHS4_Predictions', 'NIH_Funded_PIs_2017_Human_AutoRIF', 'NIH_Funded_PIs_2017_Human_GeneRIF', 'NURSA_Human_Endogenous_Complexome', 'OMIM_Disease', 'OMIM_Expanded', 'Old_CMAP_down', 'Old_CMAP_up', 'Orphanet_Augmented_2021', 'PFOCR_Pathways', 'PFOCR_Pathways_2023', 'PPI_Hub_Proteins', 'PanglaoDB_Augmented_2021', 'Panther_2015', 'Panther_2016', 'Pfam_Domains_2019', 'Pfam_InterPro_Domains', 'PheWeb_2019', 'PhenGenI_Association_2021', 'Phosphatase_Substrates_from_DEPOD', 'ProteomicsDB_2020', 'Proteomics_Drug_Atlas_2023', 'RNA-Seq_Disease_Gene_and_Drug_Signatures_from_GEO', 'RNAseq_Automatic_GEO_Signatures_Human_Down', 'RNAseq_Automatic_GEO_Signatures_Human_Up', 'RNAseq_Automatic_GEO_Signatures_Mouse_Down', 'RNAseq_Automatic_GEO_Signatures_Mouse_Up', 'Rare_Diseases_AutoRIF_ARCHS4_Predictions', 'Rare_Diseases_AutoRIF_Gene_Lists', 'Rare_Diseases_GeneRIF_ARCHS4_Predictions', 'Rare_Diseases_GeneRIF_Gene_Lists', 'Reactome_2013', 'Reactome_2015', 'Reactome_2016', 'Reactome_2022', 'SILAC_Phosphoproteomics', 'SubCell_BarCode', 'SynGO_2022', 'SysMyo_Muscle_Gene_Sets', 'TF-LOF_Expression_from_GEO', 'TF_Perturbations_Followed_by_Expression', 'TG_GATES_2020', 'TRANSFAC_and_JASPAR_PWMs', 'TRRUST_Transcription_Factors_2019', 'Table_Mining_of_CRISPR_Studies', 'Tabula_Muris', 'Tabula_Sapiens', 'TargetScan_microRNA', 'TargetScan_microRNA_2017', 'The_Kinase_Library_2023', 'Tissue_Protein_Expression_from_Human_Proteome_Map', 'Tissue_Protein_Expression_from_ProteomicsDB', 'Transcription_Factor_PPIs', 'UK_Biobank_GWAS_v1', 'Virus-Host_PPI_P-HIPSTer_2020', 'VirusMINT', 'Virus_Perturbations_from_GEO_down', 'Virus_Perturbations_from_GEO_up', 'WikiPathway_2021_Human', 'WikiPathways_2013', 'WikiPathways_2015', 'WikiPathways_2016', 'WikiPathways_2019_Human', 'WikiPathways_2019_Mouse', 'dbGaP', 'huMAP', 'lncHUB_lncRNA_Co-Expression', 'miRTarBase_2017']